/*----------------------------------------------------------------------*/
/* PROGRAM: bootstrap_statistics_v0_1.ado				*/  
/*									*/
/* PURPOSE:								*/
/* [*]	Given a dataset with bootstrapped estimates, this code outputs	*/
/*	the point estimates, bootstrapped standard errors (SE), the	*/
/*	bootstrapped 95% confidence interval (CI), 99% CI, 90% CI, the 	*/
/*	bootstrapped p-value stars for the null hypothesis that the 	*/
/*	estimate = 0,and the bootstrapped p-value stars for the null 	*/
/*	hypothesis that the estimate = 1.Testing this second null 	*/
/*	hypothesis is particularly relevant for the decompositions that */
/*	are constructed in this paper, but might not be useful 		*/
/*	elsewhere. The bootstrap statistics are output in Excel.	*/
/*									*/
/* USE ON:								*/
/* [*] 	The code assumes the following structure for the dataset upon 	*/
/*	it is called: Each column is a different variable (or estimate).*/
/*	Each row contains the bootstrapped values for the		*/
/*	columns from ONE call on a bootstrapped sample. For example, if */
/*	you bootstrap an algorithm 100 times, then the data set you 	*/
/*	apply the bootstrap_statistics procedure to should contain 101 	*/
/*	rows. The reason there are 101 rows is because the 		*/
/*	first row of the data set bootstrap_statistics is called on is 	*/
/*	the non-bootstrapped sample(i.e.,the non-resampled full sample).*/
/*	This is	important because we use the variable values in the 	*/
/*	first row as the estimates that are output in the Excel file.	*/
/*									*/
/* DETAILS:								*/
/* [*] 	The first observation in the data set on which you call 	*/
/*	bootstrap_statistics is assumed	to contain the estimate, i.e., 	*/
/*	the first row should contain the values for the variable 	*/
/*	obtained from the full non-resampled sample.			*/
/* [*] 	The .ado file will issue a note if there are any string 	*/
/*	variables in the data set. It will not compute any statistics 	*/
/*	for string variables.						*/
/* [*] 	The .ado file will issue a note if there are any variables in 	*/
/*	the data set that do not have any non-missing values and will 	*/
/*	not compute statistics for these variables			*/
/* [*]	The .ado file will issue a note as statistics for each variable */
/*	are computed. This note will also list how many values are set 	*/
/*	as missing for that particular variable.			*/
/* [*] 	Currently, there is no functionality built in to compute the 	*/
/*	bootstrap statistics for only a subset of the variables 	*/
/*	included in the input data set. In other words, the .ado file	*/
/*	computes statistics for ALL variables included in the dataset.	*/
/*									*/														
/* INPUTS:								*/
/* [*] 	arg_outpath: The filepath where you want to save the Excel file */
/*	with the output							*/
/* [*] 	arg_outfilename: The filename of the Excel file you want to	*/
/*	output to (You do not have to specify a file extension - the 	*/
/*	default extension is ".xls")					*/
/* [*] 	arg_tabname: The tabname in the Excel file you want to output to*/
/*									*/
/* OUTPUTS:								*/
/* All outputs listed below are columns in the Excel file. Each variable*/
/* in the data set is a	separate row.					*/
/* [*] 	VARNAME: The name of the variable for which we compute stats	*/
/* [*] 	ESTIMATE: The point estimate from the bootstrap (i.e.,the point */
/*	estimate on the full sample prior to resampling)		*/
/* [*] 	BOOTSTRAPPED SE: The standard error obtained from bootstrapping.*/
/*	The standard error is defined as the standard deviation of the 	*/
/*	bootstrapped samples, including the non-bootstrapped sample,	*/
/*	with the full values						*/
/* [*] 	LOWER 95% CI: The lower limit of the 95% confidence interval	*/
/*	(the 2.5th percentile of the bootstrapped samples).		*/								
/* [*] 	UPPER 95% CI:cThe upper limit of the 95% confidence interval 	*/
/*	(the 97.5 percentile of the bootstrapped samples).		*/							
/* [*] 	LOWER 90% CI: The lower limit of the 90% confidence interval 	*/
/*	(the 5th percentile of the bootstrapped samples).		*/
/* [*] 	UPPER 90% CI: The upper limit of the 90% confidence interval 	*/
/*	(the 95th percentile of the bootstrapped samples).		*/
/* [*] 	LOWER 99% CI: The lower limit of the 99% confidence interval 	*/
/*	(the 0.5th percentile of the bootstrapped samples).		*/
/* [*] 	UPPER 99% CI: The upper limit of the 99% confidence interval 	*/
/*	(the 99.5th percentile of the bootstrapped samples).		*/
/* [*] 	CI STARS: The significance stars based on the bootstrapped 	*/
/*	confidence intervals. Each bootstrapped confidence interval is 	*/
/*	checked to see whether it contains zero. If a given confidence 	*/
/*	interval does not contain zero, then the estimate is significant*/
/*	at that level. The null hypothesis is H0 = 0. If either the 	*/
/*	lower or upper limit of the CI is equal to zero, then the 	*/
/*	estimate is not	considered to be significant.			*/
/* [*] 	CI STARS H0=1: The significance stars based on the bootstrapped */
/*	confidence intervals for the null hypothesis H0 = 1. If either 	*/
/*	the lower or upper limit of the CI is equal to one, then the 	*/
/*	estimate is not considered to be significant. All of the output */
/*	to Excel is not formatted like it is usually with Stata output 	*/
/*	commands such as estout. The reason for the unformatted output 	*/
/*	is to allow for flexibility of formatting in Excel for the	*/
/*	number of decimal points shown, whether significance stars 	*/
/*	appear on the estimate or on the standard error, etc.		*/
/*									*/
/* EXAMPLE CALL TO PROCEDURE:						*/
/*	local filenm "TEST"						*/
/*	local path "/disk/scratch8/lristovs/oregon_mte"			*/
/*	local tab "test"						*/
/*									*/
/*	use "/disk/scratch8/lristovs/test_data.dta", clear		*/
/*	bootstrap_statistics `path' `filenm' `tab'			*/
/*									*/
/* VERSION CONTROL:							*/
/* v0.1: 	Revised the output such that existing sheets are 	*/
/*		replaced instead of modified				*/
/*----------------------------------------------------------------------*/

capture program drop bootstrap_statistics

program bootstrap_statistics
args arg_outpath arg_outfilename arg_tabname
marksample touse

* Define a counter that keeps track of what row to output to in the 
* Excel file. Each row is a separate variable 
local temp_row = 1

* Get a list of all variables in the data set to compute statistics for 
* There is currently no functionality included in this code to only 
* output the statistics for select variables. Therefore, this code will 
* output the bootstrap statistics for all variables in the data set
quietly ds
local temp_varlist "`r(varlist)'"
	
* Set up Excel output 
version 14.1: putexcel set "`arg_outpath'/`arg_outfilename'.xls", ///
	sheet("`arg_tabname'", replace) modify

* Compute statistics for all variables in data set
foreach v of local temp_varlist {

	version 14.1: putexcel 	A1=("VARNAME") 		///
			B1=("ESTIMATE") 		///
			C1=("BOOTSTRAPPED SE") 		///
			D1=("LOWER 95% CI") 		///
			E1=("UPPER 95% CI") 		///
			F1=("LOWER 90% CI") 		///
			G1=("UPPER 90% CI") 		///
			H1=("LOWER 99% CI") 		///
			I1=("UPPER 99% CI") 		///
			J1=("CI STARS") 		///
			K1=("CI STARS H0=1")
	
	* Exclude the random trailing variable from the 'ds' command 
	if "`v'"!="__000000" {
	
	* Check if the variable is numeric 
	capture confirm numeric var `v'
			
	if _rc==0 {
			
		* Check if the variable has any non-missing values
		quietly su `v'
			
		* Output a warning if a variable consists of only missing values 
		if `r(N)'==0 {
		
			di " "
			di "WARNING: The variable `v' has no non-missing values."
			di "No statistics will be output for this variable."
			di " "
		}
				
		else {
			
			* Output a note about which variable is being 
			* output right now 
			di " "
			di "NOTE: Outputting bootstrap statistics for variable `v'."
			
			* Check if the variable contains missing values 
			quietly count if `v'==.
			
			if `r(N)'!=0 {
			
				* Output a note on the number of missing values 
				* Even if a variable has some missing values, 
				* but at least one non-missing value, the code 
				* will output statistics for this variable
				di "WARNING: The variable `v' contains `r(N)' missing values."
				
			}
		
			quietly {
			
			preserve
			
			* Drop all other variables except the current one
			keep `v'
			local ++temp_row
			
			* Output the variable name to Excel		
			putexcel A`temp_row'=("`v'")
			
			* Calculate and output the estimate
			
			gen obs_num = _n
			keep if obs_num==1
			
			su `v'
			local estmt = `r(mean)'
		
			putexcel B`temp_row'=(`estmt')
			
			restore
			
			* Drop the full-sample observation			
			preserve 
			
			gen obs_num = _n
			drop if obs_num==1
			
			* Output the bootstrapped standard error
			su `v'
			local se = `r(sd)'
						
			putexcel C`temp_row'=(`se')
					
			* Calculate and output the 95% confidence intervals				
			_pctile `v', p(2.5)
			local ci_95_lower = `r(r1)'

			putexcel D`temp_row'=(`ci_95_lower')
			
			_pctile `v', p(97.5)
			local ci_95_upper = `r(r1)'
			
			putexcel E`temp_row'=(`ci_95_upper')
		
			* Calculate and output the 99% confidence intervals			
			_pctile `v', p(0.5)
			local ci_99_lower = `r(r1)'
			
			putexcel H`temp_row'=(`ci_99_lower')
			
			_pctile `v', p(99.5)
			local ci_99_upper = `r(r1)'
			
			putexcel I`temp_row'=(`ci_99_upper')
				
			* Calculate and output the 90% confidence intervals			
			_pctile `v', p(5)
			local ci_90_lower = `r(r1)'
			
			putexcel F`temp_row'=(`ci_90_lower')
			
			_pctile `v', p(95)
			local ci_90_upper = `r(r1)'
			
			putexcel G`temp_row'=(`ci_90_upper')
				
			* Calculate and output the significance stars for H0 = 0
			* We check each bootstrapped confidence interval (99%, 
			* 95%, and 90%, as computed above) to see if it contains 
			* zero. If a given confidence interval contains zero,
			* then the estimate is not significant at that level
			
			local pval "***"
			
			if `ci_99_lower'<=0 & `ci_99_upper'>=0 {
				local pval "**"
			} 
			if `ci_95_lower'<=0 & `ci_95_upper'>=0 {
				local pval "*"
			}
			if `ci_90_lower'<=0 & `ci_90_upper'>=0 {
				local pval ""
			}
			
			putexcel J`temp_row'=("`pval'")
				
			* Calculate and output the significance stars for H0 = 1
			* We check each bootstrapped confidence interval (99%,
			* 95%, and 90%, as computed above) to see if it 	
			* contains 1. If a given confidence interval contains 1, 
			* then the estimate is not significant at that level
			
			local pval "***"
			
			if `ci_99_lower'<=1 & `ci_99_upper'>=1 {
				local pval "**"
			} 
			if `ci_95_lower'<=1 & `ci_95_upper'>=1 {
				local pval "*"
			}
			if `ci_90_lower'<=1 & `ci_90_upper'>=1 {
				local pval ""
			}
			
			putexcel K`temp_row'=("`pval'")
					
			* Restore
			restore
			
			} /// close quietly loop
			
		} /// close loop on missing variables
		
	} /// close loop on numeric variables
			
	* Issue a note for string variables
	
	else {
		di " "
		di "WARNING: The variable `v' is not numeric."
		di "No bootstrap statistics will be output for this variable."
		di "Continuing execution."
		di " "
	}
	
	} /// close loop on random variable
		
} /// close loop on all variables
	
end
